knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.align = 'center')
library(caret)
load(file = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/04.RandomForest/01.ClassifierTraining/BIN_100_24barcodes_Classifier.RData") load("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/04.RandomForest/01.ClassifierTraining/RData/BIN_100_24barcodes_TestData.RData")
Fit1
ROC_Tab <- data.frame(obs = TestData$Class, predict(Fit1, TestData, type = "prob"), pred = predict(Fit1, newdata = TestData))
postResample(pred = ROC_Tab$pred, obs = ROC_Tab$obs)
confusionMatrix(data = ROC_Tab$pred, reference = ROC_Tab$obs)
mean(apply(ROC_Tab[, 3:ncol(ROC_Tab) - 1], 1, max) > 0.2)
ROC_Tab_PPT <- ROC_Tab[apply(ROC_Tab[, 2:11], 1, max) > 0.313, ] postResample(pred = ROC_Tab_PPT$pred, obs = ROC_Tab_PPT$obs)
lapply(seq(5, 95, 5)/100, function(i) { ReadsPercent <- mean(apply(ROC_Tab[, 3:ncol(ROC_Tab) - 1], 1, max) >= i)*100 ROC_Tab_PPT <- ROC_Tab[apply(ROC_Tab[, 3:ncol(ROC_Tab) - 1], 1, max) > i, ] Accu <- postResample(pred = ROC_Tab_PPT$pred, obs = ROC_Tab_PPT$obs)[1] data.frame(ReadsPercent = ReadsPercent, Accuracy = Accu) }) -> Cutoff_Select Cutoff_Select <- do.call(rbind, Cutoff_Select) Cutoff_Select$Cutoff <- seq(5, 95, 5)/100
library(ggplot2) ggplot(Cutoff_Select, aes(ReadsPercent, Accuracy)) + geom_line() + scale_x_reverse() + labs(y = "Accuracy", x = "Percentage of successful reads") + theme_classic() + theme(legend.position = "none", axis.title = element_text(size = 16), axis.text = element_text(size = 12)) + geom_hline(yintercept = 0.99) + geom_vline(xintercept = 77.2)
library(openxlsx) Mat <- openxlsx::read.xlsx("/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/data/Meta/Barcode_RNA-更正终版.xlsx", sheet = 2) Mat <- Mat[, c(1, 2, 5)] Mat <- data.table(Barcode = rep(Mat$Barcode, mapply(length, strsplit(Mat$Gene.Name, "/"))), Name = rep(Mat$Name, mapply(length, strsplit(Mat$Gene.Name, "/"))), Gene = gsub(" ", "", unlist(strsplit(Mat$Gene.Name, "/"))))
intersect(levels(ROC_Tab$obs), Mat[Name %in% c("BC58", "BC78", "D-BC2"), Gene])
library(RColorBrewer) library(pheatmap) sampleDists <- dist(t(ROC_Tab[, 3:ncol(ROC_Tab) - 1]), method = "canberra") sampleDistMatrix <- as.matrix(sampleDists) colnames(sampleDistMatrix) <- NULL row.names(sampleDistMatrix) <- plyr::mapvalues(row.names(sampleDistMatrix), gsub("-", ".", Mat$Gene), Mat$Name) colors <- colorRampPalette(rev(brewer.pal(9, "Blues")) )(255) pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists, col = colors, main = "")
sampleDists <- dist(t(ROC_Tab[, 3:ncol(ROC_Tab) - 1]), method = "manhattan") sampleDistMatrix <- as.matrix(sampleDists) colnames(sampleDistMatrix) <- NULL row.names(sampleDistMatrix) <- plyr::mapvalues(row.names(sampleDistMatrix), gsub("-", ".", Mat$Gene), Mat$Name) colors <- colorRampPalette(rev(brewer.pal(9, "Blues")) )(255) pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists, col = colors, main = "")
sampleDists <- dist(t(ROC_Tab[, 3:ncol(ROC_Tab) - 1]), method = "binary") sampleDistMatrix <- as.matrix(sampleDists) colnames(sampleDistMatrix) <- NULL row.names(sampleDistMatrix) <- plyr::mapvalues(row.names(sampleDistMatrix), gsub("-", ".", Mat$Gene), Mat$Name) colors <- colorRampPalette(rev(brewer.pal(9, "Blues")) )(255) pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists, col = colors, main = "")
setDT(ROC_Tab)
Gene_N <- merge(ROC_Tab[, .N, by = obs], ROC_Tab[, .N, by = pred], by.x = "obs", by.y = "pred") colnames(Gene_N) <- c("Gene", "obs", "pred") Gene_N <- merge(Gene_N, Mat, by = "Gene")
library(ggrepel) ggplot(Gene_N, aes(x = obs, y = pred)) + geom_abline(slope = 1) + geom_point() + theme_classic() + geom_text_repel(aes(label = Name), min.segment.length = 0) + theme(axis.title = element_text(size = 22), axis.text = element_text(size = 16))
Misclassified <- ROC_Tab[pred != obs, ]
Misclassified$obs <- plyr::mapvalues(Misclassified$obs, Mat$Gene, Mat$Name) Misclassified$pred <- plyr::mapvalues(Misclassified$pred, Mat$Gene, Mat$Name)
BCs <- levels(Misclassified$obs) MisPercent <- lapply(BCs, function(x) Misclassified[obs == x, as.data.frame(prop.table(table(pred)) * 100), ]) MisPercent <- as.data.table(data.frame(row = rep(BCs, mapply(nrow, MisPercent)), do.call(rbind, MisPercent))) colnames(MisPercent)[3] <- "Percent" MisPercent[, row := factor(row, levels = levels(MisPercent$pred))]
MisN <- Misclassified[, .N, by = obs][, N] names(MisN) <- Misclassified[, .N, by = obs][, obs] MisP <- ROC_Tab[, mean(pred != obs), by = obs]$V1 names(MisP) <- ROC_Tab[, mean(pred != obs), by = obs]$obs names(MisP) <- plyr::mapvalues(names(MisP), Mat$Gene, Mat$Name) MisNP <- paste0(MisN[BCs], " (", round(MisP[BCs] * 100, 2), "%)") names(MisNP) <- BCs
ggplot(MisPercent, aes(x = as.numeric(pred), y = as.numeric(row), fill = Percent)) + geom_tile() + scale_fill_viridis_c(direction = -1) + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), axis.text.x.bottom = element_text(size = 16, angle = 45, hjust = 1), axis.text.x.top = element_text(size = 12), axis.text.y = element_text(size = 16), plot.title = element_text(size = 16, face = "bold"), axis.ticks = element_blank(), legend.position = "top", legend.text = element_text(size = 12), legend.title = element_text(size = 16)) + scale_x_continuous(breaks = seq_along(BCs), labels = BCs, sec.axis = dup_axis(labels = MisPercent[, sum(Percent), by = pred][, round(V1)]), expand = c(0, 0)) + scale_y_continuous(breaks = seq_along(BCs), labels = BCs, sec.axis = dup_axis(labels = MisNP[BCs]), expand = c(0, 0))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.